# -*- coding: utf-8 -*-
"""
Created on Sat May 30 08:27:26 2020

@author: zh
"""
import pandas as pd
df_news = pd.read_csv('data/corpus_clean.csv',names=['category','content'],encoding='utf-8')
def drop_space(contents):
    contents_clean = []
    for line in contents:
        line_clean = []
        for word in line:
            if len((str(word)).strip())==0:
                continue
            line_clean.append(word)
        contents_clean.append(line_clean)
    return contents_clean
    #print (contents_clean)
        

contents_clean = drop_space(df_news.content)
df_news.content=contents_clean
print(df_news.head())
df_news.to_csv('data/corpus_clean0.csv',header=False,encoding='utf-8')
